library(tidyverse) # for data cleaning and plotting
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.1.1 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(lubridate) # for date manipulation
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(openintro) # for the abbr2state() function
## Loading required package: airports
## Loading required package: cherryblossom
## Loading required package: usdata
library(palmerpenguins)# for Palmer penguin data
library(maps) # for map data
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
library(ggmap) # for mapping points on maps
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
library(gplots) # for col2hex() function
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
library(RColorBrewer) # for color palettes
library(sf) # for working with spatial data
## Linking to GEOS 3.9.1, GDAL 3.2.3, PROJ 7.2.1; sf_use_s2() is TRUE
library(leaflet) # for highly customizable mapping
library(carData) # for Minneapolis police stops data
library(ggthemes) # for more themes (including theme_map())
theme_set(theme_minimal())
# Starbucks locations
Starbucks <- read_csv("https://www.macalester.edu/~ajohns24/Data/Starbucks.csv")
## Rows: 25600 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (11): Brand, Store Number, Store Name, Ownership Type, Street Address, C...
## dbl (2): Longitude, Latitude
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
starbucks_us_by_state <- Starbucks %>%
filter(Country == "US") %>%
count(`State/Province`) %>%
mutate(state_name = str_to_lower(abbr2state(`State/Province`)))
# Lisa's favorite St. Paul places - example for you to create your own data
favorite_stp_by_lisa <- tibble(
place = c("Home", "Macalester College", "Adams Spanish Immersion",
"Spirit Gymnastics", "Bama & Bapa", "Now Bikes",
"Dance Spectrum", "Pizza Luce", "Brunson's"),
long = c(-93.1405743, -93.1712321, -93.1451796,
-93.1650563, -93.1542883, -93.1696608,
-93.1393172, -93.1524256, -93.0753863),
lat = c(44.950576, 44.9378965, 44.9237914,
44.9654609, 44.9295072, 44.9436813,
44.9399922, 44.9468848, 44.9700727)
)
#COVID-19 data from the New York Times
covid19 <- read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv")
## Rows: 40606 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): state, fips
## dbl (2): cases, deaths
## date (1): date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
If you were not able to get set up on GitHub last week, go here and get set up first. Then, do the following (if you get stuck on a step, don’t worry, I will help! You can always get started on the homework and we can figure out the GitHub piece later):
Put your name at the top of the document.
For ALL graphs, you should include appropriate labels.
Feel free to change the default theme, which I currently have set to theme_minimal().
Use good coding practice. Read the short sections on good code with pipes and ggplot2. This is part of your grade!
When you are finished with ALL the exercises, uncomment the options at the top so your document looks nicer. Don’t do it before then, or else you might miss some important warnings and messages.
These exercises will reiterate what you learned in the “Mapping data with R” tutorial. If you haven’t gone through the tutorial yet, you should do that first.
ggmap)Starbucks locations to a world map. Add an aesthetic to the world map that sets the color of the points according to the ownership type. What, if anything, can you deduce from this visualization?It can be deduced that either different types of ownership are only available in certain locations or that different types of ownership are more popular in certain locations. This can be seen from the fact that there are almost no Joint Venture locations in the western hemisphere. The map also displays a lack of Franchise locations which is interesting considering that they are many Franchise locations in the data set.
starbucks_map <- get_stamenmap(
bbox = c(left = -177.2, bottom = -62.4, right = 207.1, top = 81.1),
maptype = "terrain",
zoom = 2
)
## Source : http://tile.stamen.com/terrain/2/0/0.png
## Source : http://tile.stamen.com/terrain/2/1/0.png
## Source : http://tile.stamen.com/terrain/2/2/0.png
## Source : http://tile.stamen.com/terrain/2/3/0.png
## Source : http://tile.stamen.com/terrain/2/4/0.png
## Not Found (HTTP 404). Failed to aquire tile /terrain/2/4/0.png.
## Source : http://tile.stamen.com/terrain/2/0/1.png
## Source : http://tile.stamen.com/terrain/2/1/1.png
## Source : http://tile.stamen.com/terrain/2/2/1.png
## Source : http://tile.stamen.com/terrain/2/3/1.png
## Source : http://tile.stamen.com/terrain/2/4/1.png
## Not Found (HTTP 404). Failed to aquire tile /terrain/2/4/1.png.
## Source : http://tile.stamen.com/terrain/2/0/2.png
## Source : http://tile.stamen.com/terrain/2/1/2.png
## Source : http://tile.stamen.com/terrain/2/2/2.png
## Source : http://tile.stamen.com/terrain/2/3/2.png
## Source : http://tile.stamen.com/terrain/2/4/2.png
## Not Found (HTTP 404). Failed to aquire tile /terrain/2/4/2.png.
ggmap(starbucks_map)+
geom_point(data = Starbucks,
aes(x = `Longitude`, y = `Latitude`, color = `Ownership Type`),
size = 0.3) +
theme_map()+
theme(legend.background = element_blank())
## Warning: Removed 1 rows containing missing values (geom_point).
tc_starbucks_map <- get_stamenmap(
bbox = c(left = -93.8800, bottom = 44.6226, right = -92.3832, top = 45.2496),
maptype = "terrain",
zoom = 10
)
## Source : http://tile.stamen.com/terrain/10/244/367.png
## Source : http://tile.stamen.com/terrain/10/245/367.png
## Source : http://tile.stamen.com/terrain/10/246/367.png
## Source : http://tile.stamen.com/terrain/10/247/367.png
## Source : http://tile.stamen.com/terrain/10/248/367.png
## Source : http://tile.stamen.com/terrain/10/249/367.png
## Source : http://tile.stamen.com/terrain/10/244/368.png
## Source : http://tile.stamen.com/terrain/10/245/368.png
## Source : http://tile.stamen.com/terrain/10/246/368.png
## Source : http://tile.stamen.com/terrain/10/247/368.png
## Source : http://tile.stamen.com/terrain/10/248/368.png
## Source : http://tile.stamen.com/terrain/10/249/368.png
## Source : http://tile.stamen.com/terrain/10/244/369.png
## Source : http://tile.stamen.com/terrain/10/245/369.png
## Source : http://tile.stamen.com/terrain/10/246/369.png
## Source : http://tile.stamen.com/terrain/10/247/369.png
## Source : http://tile.stamen.com/terrain/10/248/369.png
## Source : http://tile.stamen.com/terrain/10/249/369.png
ggmap(tc_starbucks_map)+
geom_point(data = Starbucks,
aes(x = `Longitude`, y = `Latitude`),
size = 0.6) +
theme_map()
## Warning: Removed 25452 rows containing missing values (geom_point).
I cannot chose a zoom number larger than 10 because it gives me an error saying that tiles are needed and it says to pick a different zoom. The original zoom was 10 but as soon as the zoom starts to go down, the amount of detail in the map decreases and the size of the labels increases. The change in the size of the labels on the map and how scaled they are in a great way to check and see if the zoom is good for the size of the map. As the zoom number gets smaller, the amount of detail gets exponentially smaller, this is a great tool that allows the map to change the detail as the size of the map changes but it provides little to no information for those that wanted to see more detail.
small_zoom_tc_map <- get_stamenmap(
bbox = c(left = -93.8800, bottom = 44.6226, right = -92.3832, top = 45.2496),
maptype = "terrain",
zoom = 7
)
## Source : http://tile.stamen.com/terrain/7/30/45.png
## Source : http://tile.stamen.com/terrain/7/31/45.png
## Source : http://tile.stamen.com/terrain/7/30/46.png
## Source : http://tile.stamen.com/terrain/7/31/46.png
ggmap(small_zoom_tc_map)
get_stamenmap() in help and look at maptype). Include a map with one of the other map types.tc_map <- get_stamenmap(
bbox = c(left = -93.8800, bottom = 44.6226, right = -92.3832, top = 45.2496),
maptype = "watercolor",
zoom = 10
)
## Source : http://tile.stamen.com/watercolor/10/244/367.jpg
## Source : http://tile.stamen.com/watercolor/10/245/367.jpg
## Source : http://tile.stamen.com/watercolor/10/246/367.jpg
## Source : http://tile.stamen.com/watercolor/10/247/367.jpg
## Source : http://tile.stamen.com/watercolor/10/248/367.jpg
## Source : http://tile.stamen.com/watercolor/10/249/367.jpg
## Source : http://tile.stamen.com/watercolor/10/244/368.jpg
## Source : http://tile.stamen.com/watercolor/10/245/368.jpg
## Source : http://tile.stamen.com/watercolor/10/246/368.jpg
## Source : http://tile.stamen.com/watercolor/10/247/368.jpg
## Source : http://tile.stamen.com/watercolor/10/248/368.jpg
## Source : http://tile.stamen.com/watercolor/10/249/368.jpg
## Source : http://tile.stamen.com/watercolor/10/244/369.jpg
## Source : http://tile.stamen.com/watercolor/10/245/369.jpg
## Source : http://tile.stamen.com/watercolor/10/246/369.jpg
## Source : http://tile.stamen.com/watercolor/10/247/369.jpg
## Source : http://tile.stamen.com/watercolor/10/248/369.jpg
## Source : http://tile.stamen.com/watercolor/10/249/369.jpg
ggmap(tc_map)
annotate() function (see ggplot2 cheatsheet).tc_mac_map <- get_stamenmap(
bbox = c(left = -93.8800, bottom = 44.6226, right = -92.3832, top = 45.2496),
maptype = "terrain-background",
zoom = 10
)
## Source : http://tile.stamen.com/terrain-background/10/244/367.png
## Source : http://tile.stamen.com/terrain-background/10/245/367.png
## Source : http://tile.stamen.com/terrain-background/10/246/367.png
## Source : http://tile.stamen.com/terrain-background/10/247/367.png
## Source : http://tile.stamen.com/terrain-background/10/248/367.png
## Source : http://tile.stamen.com/terrain-background/10/249/367.png
## Source : http://tile.stamen.com/terrain-background/10/244/368.png
## Source : http://tile.stamen.com/terrain-background/10/245/368.png
## Source : http://tile.stamen.com/terrain-background/10/246/368.png
## Source : http://tile.stamen.com/terrain-background/10/247/368.png
## Source : http://tile.stamen.com/terrain-background/10/248/368.png
## Source : http://tile.stamen.com/terrain-background/10/249/368.png
## Source : http://tile.stamen.com/terrain-background/10/244/369.png
## Source : http://tile.stamen.com/terrain-background/10/245/369.png
## Source : http://tile.stamen.com/terrain-background/10/246/369.png
## Source : http://tile.stamen.com/terrain-background/10/247/369.png
## Source : http://tile.stamen.com/terrain-background/10/248/369.png
## Source : http://tile.stamen.com/terrain-background/10/249/369.png
ggmap(tc_mac_map)+
annotate(geom = "point", x = -93.1691, y = 44.9379)+
annotate(geom = "text", x = -93.1691, y = 44.9579, label = "Macalester College")+
theme_map()
geom_map())The example I showed in the tutorial did not account for population of each state in the map. In the code below, a new variable is created, starbucks_per_10000, that gives the number of Starbucks per 10,000 people. It is in the starbucks_with_2018_pop_est dataset.
census_pop_est_2018 <- read_csv("https://www.dropbox.com/s/6txwv3b4ng7pepe/us_census_2018_state_pop_est.csv?dl=1") %>%
separate(state, into = c("dot","state"), extra = "merge") %>%
select(-dot) %>%
mutate(state = str_to_lower(state))
## Rows: 51 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): state
## dbl (1): est_pop_2018
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
starbucks_with_2018_pop_est <-
starbucks_us_by_state %>%
left_join(census_pop_est_2018,
by = c("state_name" = "state")) %>%
mutate(starbucks_per_10000 = (n/est_pop_2018)*10000)
dplyr review: Look through the code above and describe what each line of code does.The first line of code reads in a data set from a dropbox web address. The data set contains the estimated population for each state in the map. It also assigns the data set to the name ‘census_pop_est_2018’. The data set is then separated by state name and the information in the state category gets put into two different variables, ‘dot’ and ‘state’ which holds the split information from the previous state variable. Then the variable dot is dropped using the select because there is a negative in front of the dot. The names of the strings being held by the state variable were all changed to lowercase. This was to make the joining of the other data set later, easier and more efficient.
After the ‘census_pop_est_2018’ was wrangled to completion, a new data set, the ‘starbucks_with_2018_pop_est’ was created from the ‘starbucks_us_by_state’ which was wrangled further. The ‘starbucks_us_by_state’ data set was left_joined with the ‘census_pop_est_2018’ data set, the data sets were joined by the variable ‘state_name’. The ‘census_pop_est_2018’ data set’s state name variable was named ‘state’ so in the ‘by = …’ code, it indicates that the variable name will be ‘state_name’ but the name of the joining variable in another data table was different. The mutate at the end creates the new variable ‘starbucks_per_10000’ which calculates the number of Starbucks locations in that state according to the state’s estimated population.
WHAT MAP DATA DO I USE HERE?
leaflet)audrey_fav_places <-tibble(
place = c("Art Museum", "Illinois Home", "Hot Shop Glass",
"Chocolate Factory", "Wisconsin Home", "DC Beach",
"Qdoba", "University Lake School", "Breski's",
"Macalester College"),
long = c(-87.629798, -87.840625, -87.782852,
-88.231481, -88.216903, -87.377049,
-88.403708, -88.34204, -88.499266,
-93.1712321),
lat = c(41.878114, 42.258634, 42.726131,
43.011678, 43.054206, 44.83413,
43.060842, 43.105008, 43.111673,
44.9378965),
top3 = c(FALSE, FALSE, FALSE,
FALSE, TRUE, FALSE,
FALSE, TRUE, FALSE,
TRUE)
)
pal <- colorFactor(palette = "viridis",
domain = audrey_fav_places$top3)
leaflet(data = audrey_fav_places) %>%
addTiles() %>%
addCircles(lng = ~long,
lat = ~lat,
label = ~place,
opacity = 1,
weight = 10,
color = ~pal(top3)) %>%
addLegend(position = "bottomleft",
pal = pal,
values = ~top3) %>%
addPolylines(lng = ~long,
lat = ~lat,
color = col2hex("maroon"))
This section will revisit some datasets we have used previously and bring in a mapping component.
The data come from Washington, DC and cover the last quarter of 2014.
Two data tables are available:
Trips contains records of individual rentalsStations gives the locations of the bike rental stationsHere is the code to read in the data. We do this a little differently than usual, which is why it is included here rather than at the top of this file. To avoid repeatedly re-reading the files, start the data import chunk with {r cache = TRUE} rather than the usual {r}. This code reads in the large dataset right away.
data_site <-
"https://www.macalester.edu/~dshuman1/data/112/2014-Q4-Trips-History-Data.rds"
Trips <- readRDS(gzcon(url(data_site)))
Stations<-read_csv("http://www.macalester.edu/~dshuman1/data/112/DC-Stations.csv")
## Rows: 347 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): name
## dbl (4): lat, long, nbBikes, nbEmptyDocks
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Stations to make a visualization of the total number of departures from each station in the Trips data. Use either color or size to show the variation in number of departures. This time, plot the points on top of a map. Use any of the mapping tools you’d like.Small_Trips <- Trips %>%
group_by(`sstation`) %>%
summarise(num_departures = n()) %>%
ungroup()
New_Small_Trips <- Small_Trips %>%
left_join(Stations,
by = c("sstation" = "name")) %>%
select(`sstation`, `lat`, `long`, `num_departures`)
pal <- colorNumeric(palette = "viridis",
domain = New_Small_Trips$num_departures)
leaflet(data = New_Small_Trips) %>%
addTiles() %>%
addCircles(lng = ~long,
lat = ~lat,
opacity = 1,
weight = 10,
color = ~pal(num_departures)) %>%
addLegend(position = "topright",
pal = pal,
values = ~num_departures)
## Warning in validateCoords(lng, lat, funcName): Data contains 12 rows with either
## missing or invalid lat/lon values and will be ignored
# pal <- colorFactor(palette = "viridis",
# domain = New_Small_Trips$num_departures)
#
# leaflet(data = New_Small_Trips) %>%
# addTiles() %>%
# addCircles(lng = ~long,
# lat = ~lat,
# opacity = 1,
# weight = 10,
# color = ~pal(num_departures)) %>%
# addLegend(position = "topright",
# pal = pal,
# values = ~num_departures)
The following exercises will use the COVID-19 data from the NYT.
Use geom_map here!!
Now add the population of each state to the dataset and color the states by most recent cumulative cases/10,000 people. See the code for doing this with the Starbucks data. You will need to make some modifications.
CHALLENGE Choose 4 dates spread over the time period of the data and create the same map as in exercise 12 for each of the dates. Display the four graphs together using faceting. What do you notice?
These exercises use the datasets MplsStops and MplsDemo from the carData library. Search for them in Help to find out more information.
MplsStops dataset to find out how many stops there were for each neighborhood and the proportion of stops that were for a suspicious vehicle or person. Sort the results from most to least number of stops. Save this as a dataset called mpls_suspicious and display the table.mpls_suspicious <- MplsStops %>%
select(`neighborhood`, `problem`) %>%
group_by(`neighborhood`) %>%
mutate(num = n()) %>%
ungroup() %>%
filter(`problem` == "suspicious") %>%
group_by(`problem`, `neighborhood`) %>%
mutate(num_sus = n(),
prop_suspicious = (num_sus/num)*100) %>%
ungroup() %>%
summarise(`neighborhood`, `prop_suspicious`, `num`) %>%
arrange(desc(`num`)) %>%
distinct()
leaflet map and the MplsStops dataset to display each of the stops on a map as a small point. Color the points differently depending on whether they were for suspicious vehicle/person or a traffic stop (the problem variable). HINTS: use addCircleMarkers, set stroke = FAlSE, use colorFactor() to create a palette.pal <- colorFactor(palette = "viridis",
domain = MplsStops$problem)
MplsStops %>%
leaflet() %>%
addTiles() %>%
addCircleMarkers(lng = ~long,
lat = ~lat,
stroke = FALSE,
color = ~pal(problem),
radius = 2) %>%
addLegend(position = "bottomright",
pal = pal,
values = ~problem)
eval=FALSE. Although it looks like it only links to the .sph file, you need the entire folder of files to create the mpls_nbhd data set. These data contain information about the geometries of the Minneapolis neighborhoods. Using the mpls_nbhd dataset as the base file, join the mpls_suspicious and MplsDemo datasets to it by neighborhood (careful, they are named different things in the different files). Call this new dataset mpls_all.mpls_nbhd <- st_read("Minneapolis_Neighborhoods/Minneapolis_Neighborhoods.shp", quiet = TRUE)
mpls_all <- mpls_nbhd %>%
left_join(MplsDemo,
by = c("BDNAME" = "neighborhood")) %>%
left_join(mpls_suspicious,
by = c("BDNAME" = "neighborhood"))
leaflet to create a map from the mpls_all data that colors the neighborhoods by prop_suspicious. Display the neighborhood name as you scroll over it. Describe what you observe in the map.There is the value NA in the legend and South Uptown is the only neighborhood that is filled with the NA color. There are also sections of neighborhoods that have a higher proportion of suspicious stops than traffic stops, this means that those neighborhoods likely have a more present police force. The lower right quarter of the map shows that there are a higher number of suspicious traffic stops.
pal <- colorNumeric(palette = "magma",
domain = mpls_all$prop_suspicious)
mpls_all %>%
leaflet() %>%
addTiles() %>%
addPolygons(fillColor = ~pal(prop_suspicious),
fillOpacity = 0.8,
label = ~BDNAME) %>%
addLegend(pal = pal,
values = ~prop_suspicious,
opacity = 0.8,
title = NULL,
position = "bottomleft")
leaflet to create a map of your own choosing. Come up with a question you want to try to answer and use the map to help answer that question. Describe what your map shows.DID YOU REMEMBER TO UNCOMMENT THE OPTIONS AT THE TOP?